#! /usr/bin/env python2.7
#***************************************************************************
#*   Copyright (C) 2013 by Edson Borin                                     *
#*   edson@ic.unicamp.br                                                   *
#*                                                                         *
#*   This program is free software; you can redistribute it and/or modify  *
#*   it under the terms of the GNU General Public License as published by  *
#*   the Free Software Foundation; either version 2 of the License, or     *
#*   (at your option) any later version.                                   *
#*                                                                         *
#*   This program is distributed in the hope that it will be useful,       *
#*   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
#*   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
#*   GNU General Public License for more details.                          *
#*                                                                         *
#*   You should have received a copy of the GNU General Public License     *
#*   along with this program; if not, write to the                         *
#*   Free Software Foundation, Inc.,                                       *
#*   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
#**************************************************************************/

# Description
#
# This is a python script to summarize results generated by the
# run_regression.py script.
#
# Input: results dir. 
#
# a) The "results dir" contains a set of "revision directories" (r1, r2, rN)
# b) Each revision directory contains a set of information for each "build configuration" (buidbasename).
#    - bldbasename/ : directory with results for the given build configuration basename.
#    - bldbasename.info.cfg : information about the status of the regression for the given build.
#    - bldbasename.run.log : log file containing messages written to stdout and stderr durring the regression execution
# c) Each "bldbasename" directory contains a set of directories and information
#    files for "use case scenarios", each one representing a different use case
#    scenario. Each 
#    - substruct_ucs1/
#    - substruct_ucs1.info
# d) Each "use case scenario" may contain multiple steps. The performance of
#    each step may be monitored individually. Hence, each use case scenario may
#    produce multiple "step results", each one represented by a single "raw data
#    table" (RDT) result file. The "
#    - step1.rdt
#    - step2.rdt
#
# The following summary is generated:
#
# 1) Current performance status (current_perf_status.csv)
# 
# CSV file with the following information
# scenario_name, scenario_desc_fn, step_name, bldbasename, bld_desc_fn, best_result_rev, cur_rev, best_result, current_result, perf_loss

#Scenario,Scen. Desc. Filename,Step,Build,Build Desc. Filename,Best rev,Best rev average,Best rev conf. int.,Last Working rev,Last Working rev average,Last Working rev conf. int.,Performance loss,Last Revision Status

# scenario_name, scenario_desc_fn, step_name, bldbasename, bld_desc_fn, best_result_rev, cur_rev, best_result, current_result, perf_loss
#
# * scenario_desc_fn and bld_desc_fn contain the name of the files that
#   describes the scenario and the build configurations, respectively.
# * best_result_rev indicates the revision id of the best results obtained so
#   far for this (scenario x build_configuration).
# * perf_loss indicate the performance slow down when comparing the
#   current_result and the best_result. The comparison takes into account the
#   95% confidence interval when comparing the averages.
#
# 2) scenario and build configuration description files (scenario_name.dsc and bldbasename.dsc)
#
# 3) Performance accross build configurations.
#
# CSV file with the following information
# 

# Main steps:
# 1) load from file or generates a list with all results, where each item is the following tuple
#    (revN, build_cfg, scenario_name, step_name, average exec_time, conf. int)
#
# - May dump list to file
#
# 2) For each valid combination (scenario_name,step_name,build_cfg), it
#    generates a list of tupples (revN, average exec_time, conf. int)
#
# 3) For each valid combination (scenario_name,step_name,revN), it generates a
#    list of tupples (build_cfg, average exec_time, conf. int)

import sys
import os
import cfg_file
import rdt
import stats

default_result_dir="../results"
default_summary_dir="./summary"

# Generates:
#  a) a list with all results, where each item is the following tuple
#    (revN, build_cfg, scenario_name, step_name, average exec_time, conf. int)
#  b) a dictionary that maps scenario_name to scenario info files
#  c) a dictionary that maps build_name to build info files
# Collect the results from results_basename directory
def build_list(results_basename) :
	all_results = []
	scenarios_info = {}
	builds_info = {}
	dirs = {}
	for d,subd,files in os.walk(results_basename) : 
		dirs[d] = (subd,files)
	# List of revision directories
	rev_dirs = dirs[results_basename][0]
	for rd in rev_dirs : 
		build_dirs = dirs[os.path.join(results_basename,rd)][0]
		for bd in build_dirs :
			build_info_fn = os.path.join(results_basename,rd,bd+".info.cfg")
			try:
				build_info_d = cfg_file.read(build_info_fn)
			except cfg_file.CFGError, e:
				print "WARNING: could not read file "+build_info_fn+" (", e, ")"
				continue
			if bd in builds_info : builds_info[bd].append(build_info_fn)
			else: builds_info[bd] = [ build_info_fn ]
			srcver = build_info_d["srcver"]
			scenario_dirs = dirs[os.path.join(results_basename,rd,bd)][0]
			for sd in scenario_dirs : 
				scenarios_info[sd] = os.path.join(results_basename,rd,bd,sd+".info")
				step_results = dirs[os.path.join(results_basename,rd,bd,sd)][1]
				for sr in step_results :
					all_results.append((int(srcver),rd,bd,sd,sr[0:-4],os.path.join(results_basename,rd,bd,sd,sr)))
	# Scenario descriptions
	scenarios_desc = dict(map(lambda (k,v): (k,cfg_file.read(v)), scenarios_info.iteritems()))
	builds_desc = dict(map(lambda (k,v): (k,map(lambda (lit): cfg_file.read(lit),v)), builds_info.iteritems()))
	return all_results, scenarios_desc, builds_desc

#(4932,'r4932', 'icc-12.1.3-O3-new_skylmat', 'substruct_tst4', 'tpzdohrass', './r4932/icc-12.1.3-O3-new_skylmat/substruct_tst4/tpzdohrass.rdt')

# Scenario descriptions
def add_av_ci((revn,revd,bld,scn,step,rdtfn)) :
	try: 
		rdt_d=rdt.read(rdtfn)
		elapsed_list=rdt.get_column_values(rdt_d,"ELAPSED")
		user_list=rdt.get_column_values(rdt_d,"SELF_RU_UTIME")
		try:
			av=stats.average(elapsed_list)
		except stats.StatsError, e:
			av=0.0
		try:
			usr_av=stats.average(user_list)
		except stats.StatsError, e:
			usr_av=0.0
		try:
			ci=stats.conf_int(elapsed_list, 95.0)
		except stats.StatsError, e:
			ci=0.0
	except rdt.RdtError, e:
		print "WARNING: error when summarizing results for", fn, "(", e, ")"
		av=0.0
		ci=0.0
		usr_av=0.0
	return (revn,revd,bld,scn,step,rdtfn,av,ci,usr_av) 
	
def verbose(msg):
	print msg

def error(message, status):
	sys.stderr.write('ERROR: '+message+'\n')
        sys.exit(status)

def fatal(message):
	sys.stderr.write('FATAL: '+message+'\n')
        sys.exit(1)

def warning(message):
	sys.stderr.write('WARNING: '+message+'\n')



#
# CSV files containing information about results:
#
# main.csv (list of revisions tested)
# Revision , Date, Set of 
# 
# For each pair (bldname,RDT result file), the script scans the set of revisions
# looking for variations on the performance.

# results/r1/...
#        /r2/... 
#        /rN/
#        /rN/bld-1.info.cfg
#        /rN/bld-1.run.log
#        /rN/bld-1/...
#        /rN/bld-n.info.cfg
#        /rN/bld-n.run.log
#        /rN/bld-n/test1.info
#        /rN/bld-n/test1/...
#        /rN/bld-n/testn.info
#        /rN/bld-n/testn/res1.rdt
#        /rN/bld-n/testn/resN.rdt
#      

#   results
# Rationale:
#
# $> build.py -c bldcfg [-a appcfg] [-v 123] [-l output_log] [-o status]  
#
# calls system_environment.py to verify if the current environment is equivalent to cfg/env/current 
# and return the environment version.
# (STATUS,ENVVER) = env-check.py 
# - STATUS=OK|ERR
# - ENVVER=[cfg/env/current] 
#
# VERIFY(STATUS)
#
# calls source-setup.py to setup the source code (place a copy of the code at SRCDIR).
# (STATUS,SRCDIR,SRCVER,MODIFIED) = source-setup.py [-a appcfg] [-v version] 
# - STATUS=OK|ERR
# - SRCDIR=work/src/appcfg.id
# - SRCVER=current_version
# - MODIFIED=TRUE|FALSE
#
# VERIFY(STATUS)
# SUFFIX="SRCVER-ENVVER
# if [MODIFIED]: SUFFIX="SUFFIX-m"
#
# calls build-setup.py to configure the build (it may use autotools, cmake, etc...)
# (STATUS,BLDDIR,INSDIR,SUFFIX) = build-setup.py -a appcfg -c bldcfg
# - STATUS=OK|ERR
# - BLDDIR=work/build/appcfg.id-bldcfg.id-SUFFIX
# - INSDIR=results/build/appcfg.id-bldcfg.id-SUFFIX
#
# VERIFY(STATUS)
# 
# call build-make.py to make and install tool (it may your favorite make tool)
# (STATUS) = build-make.py -b BLDDIR -i INSDIR
# 
# VERIFY(STATUS)
#
# if(STATUS==OK)
#  move log files (including the CMakeCache.txt) to INSDIR
# else
#  gen report on BLDDIR
#
# The log messages are directed to output_log if -l is provided, and 
# The build status is provided on the status file, if -o is provided. 
#  - It contains information about the build status and can parsed to 
#    check for errors or retrieve the install dir.

# Configuration files:
# cfg/env/current: contains the name of the file that describes the current environment.
# cfg/app/appcfg: information about the application to be built
#  - appcfg.id: application short name (ex: neopz)
# cfg/build/bldcfg: cmake (configure) flags to configure the building
#  - bldcfg.id: cmake|autotools configuration short name (ex: gcc47-O3)

def usage():
	print "\nUsage: summarize_results.py [-r results_dir] [-o summary_dir] [-h]\n"
	print "\nARGUMENTS"
	print "\t-r results_dir: directory containing the performance regression results. Default = "+default_result_dir
	print "\t-o summary_dir: directory to generate the summary output files. Default = "+default_summary_dir
        print "\t-h            : display this usage message."
	print "\nDESCRIPTION"
	print "\tThe summarize script summarizes the results generated by the run_regression.py script."
	sys.exit(0)

# Parse arguments
import getopt
result_dir=default_result_dir
summary_dir=default_summary_dir
opts, extra_args = getopt.getopt(sys.argv[1:], 'r:o:h')
for f, v in opts:
	if f   == '-r'  : result_dir=v
	elif f == '-o'  : summary_dir=v
	elif f == '-h'  : usage()

# Check result dir path
if not os.path.isdir(result_dir): 
	fatal("Invalid results directory path: "+result_dir)

# Create the summary directory
if not os.path.isdir(summary_dir) :
	try:    
		os.makedirs(summary_dir)
	except: 
		fatal("Error when creating the summary directory: "+summary_dir)
		
# Collect results
res,sc_desc,bd_desc = build_list(result_dir)

# Compute average and confidence intervals for each RDT file
res = map(add_av_ci, res)

#for r in res : 
#	print r

# 2) For each valid combination (scenario_name,step_name,build_cfg), it
#    generates a list of tupples (revN, average exec_time, conf. int)

step_results = {}
for (revn,revd,bld,scn,step,rdtfn,av,ci,usr_av) in res : 
	if (scn,step,bld) in step_results :
		step_results[(scn,step,bld)].append((revn,av,ci,usr_av))
	else :
		step_results[(scn,step,bld)] = [(revn,av,ci,usr_av)]


# Write performance per scenario
for (scn,step,bld),v in step_results.iteritems() :
	fn=os.path.join(summary_dir,str(bld)+"."+str(scn)+"."+str(step)+".csv")
	try :
		f = open(fn,'w')
		f.write("Revision,Average,Conf. Interval\n")
		for (revn,av,ci,usr_av) in sorted(v,key=lambda(r,a,c,u) : r) :
			f.write(str(revn)+","+str(av)+","+str(ci)+"\n")
		f.close()
	except:
		print "WARNING: could not write results to file "+fn


# Write perf stats
perf_stats_fn=os.path.join(summary_dir,"current_perf_status.csv")
try:
	f = open(perf_stats_fn, 'w')
except IOError:
	error('Could not open file for writting: '+perf_stats_fn)

def upper_interval((revn,av,ci,usr_av)) :
	return (av+ci)

def process(v) :
	(best_res_rev,best_res_av,best_res_ci,best_res_usr_av) = min(v,key=upper_interval)
	(last_w_res_rev,last_w_res_av,last_w_res_ci,last_w_res_usr_av) = (best_res_rev,best_res_av,best_res_ci,best_res_usr_av)
	for (r,a,c,u) in v : 
		if r > last_w_res_rev : (last_w_res_rev,last_w_res_av,last_w_res_ci,last_w_res_usr_av) = (r,a,c,u)
	try :
		perf_loss = ((last_w_res_av-last_w_res_ci) / (best_res_av+best_res_ci) ) 
	except :
		warning("Could not comput performance loss because best result average + ci = "+str(best_res_av+best_res_ci));
		perf_loss = 0
	if perf_loss < 1.0 : perf_loss = 1.0
	return (best_res_rev,best_res_av,best_res_ci,last_w_res_rev,last_w_res_av,last_w_res_ci,last_w_res_usr_av,perf_loss);

perf_stats_list = []
for (scn,step,bld),v in step_results.iteritems() :
	if len(v) > 0 :
		(best_res_rev,best_res_av,best_res_ci,last_w_res_rev,last_w_res_av,last_w_res_ci,last_w_res_usr_av,perf_loss) = process(v)
		try:
			usr_wall=last_w_res_usr_av/last_w_res_av
		except:
			usr_wall=-1.0
		perf_stats_list.append((scn,scn+".info",step,bld,bld+".info",
				       best_res_rev,best_res_av/1000,best_res_ci/1000,
				       last_w_res_rev,last_w_res_av/1000,last_w_res_ci/1000, usr_wall,
				       perf_loss))

def max_perf_loss_key(i) :
	return i[11]

f.write("Step / Scenario,Scen. Desc. Filename,Build,Build Desc. Filename,"+
	"Best rev,Best rev average,Best rev conf. int.,"+
	"Last Working rev,Last Working rev average,Last Working rev conf. int.,"+
	"Performance loss,User/Wall,Scenario Description\n") #, Last rev, Last revision status\n")

for (scn,scn_desc_fn,step,bld,bld_desc_fn,best_res_rev,best_res_av,best_res_ci,last_w_res_rev,last_w_res_av,last_w_res_ci,last_w_res_user_wall,perf_loss) in sorted(perf_stats_list,key=max_perf_loss_key,reverse=True) : 
	try:
		scn_desc_d=sc_desc[scn]
		desc=cfg_file.getfld(scn_desc_d,"test_desc")
	except cfg_file.CFGError, e:
		print "WARNING: "+str(e)
		desc="???"
	f.write("%s,%d,%.4f,%.4f,%d,%.4f,%.4f,%.3f,%.4f,%s\n" % 
		(step+" / "+scn+","+scn_desc_fn+","+bld+","+bld_desc_fn,
		 best_res_rev,best_res_av,best_res_ci,
		 last_w_res_rev,last_w_res_av,last_w_res_ci,
		 perf_loss,last_w_res_user_wall,desc))
f.close()

for scn,sc_d in sc_desc.iteritems() : 
	scn_info_fn=os.path.join(summary_dir,scn+".info")
	try:
		cfg_file.write(scn_info_fn,sc_d)
	except cfg_file.CFGError, e :
		warning(str(e))

# For each build configuration, write a csv file (*.build_run_status.csv)
# containing the build and run status for each source code revision
for bdn,bd_d in bd_desc.iteritems() : 
	bd_info_fn=os.path.join(summary_dir,bdn+".build_run_status.csv")
	try:
		f = open(bd_info_fn, 'w')
		f.write("revision,build_status,run_status\n")
		for i in bd_d : 
			try:
				rev=cfg_file.getfld(i,"srcver")
				bdst=cfg_file.getfld(i,"build_status")
				runst=cfg_file.getfld(i,"run_status")
				f.write(str(rev)+","+str(bdst)+","+str(runst)+"\n");
			except cfg_file.CFGError, e:
				warning(str(e))
				
	except IOError:
		raise CFGError('could not open file for writting: '+filename)

		
#
# 3) For each valid combination (scenario_name,step_name,revN), it generates a
#    list of tupples (build_cfg, average exec_time, conf. int)


#('r4932', 'icc-12.1.3-O3-new_skylmat', 'substruct_tst4', 'tpzdohrass', './r4932/icc-12.1.3-O3-new_skylmat/substruct_tst4/tpzdohrass.rdt')
#    (revN, build_cfg, scenario_name, step_name, average exec_time, conf. int)
    
print "Done..."
sys.exit(0)
